This is an R markdown document illustrating the bioinformatic methods used to generate the results for the manuscript by Arsenault et al. entitled “Simple inheritance, complex regulation: supergene-mediated fire ant queen polymorphism”. If you have any questions about this document, please direct them to Sam Arsenault at samarsenault93@gmail.com or Brendan G. Hunt at brendan.hunt@gmail.com
library(edgeR)
library(UpSetR)
library(readr)
library(magick)
library(gridExtra)
library(karyoploteR)
library(cowplot)
library(ggplot2)
library(topGO)
library(data.table)
library(eulerr)
library(pheatmap)
library(CircStats)
library(RColorBrewer)
library(randtests)
library(readxl)
make_polar_DEG <- function(TT_table,radial_lim) {
## Things to Add:
library(ggplot2)
# library(gridExtra)
library(cowplot)
internal <- TT_table
internal$neglogFDR <- -log10(TT_table$FDR)
internal$angle <- atan2(TT_table$logFC.GenotypeBb,TT_table$logFC.Genotypebb)
internal$abslogBb <- abs(TT_table$logFC.GenotypeBb)
internal$abslogbb <- abs(TT_table$logFC.Genotypebb)
internal$length <- internal[,c("abslogBb","abslogbb")][cbind(seq_len(nrow(internal)), max.col(internal[,c("abslogBb","abslogbb")]))]
d=data.frame(angle=c(atan2(1,2),atan2(1,1),atan2(2,1),atan2(1,0),atan2(1,-1),atan2(0,-1),atan2(-1,-2),atan2(-1,-1),atan2(-2,-1),atan2(-1,0),atan2(-1,1),atan2(0,1)),
category=c("bb>Bb>BB", "bb=Bb>BB", "Bb>bb>BB", "Bb>BB=bb", "Bb>BB>bb", "Bb=BB>bb","BB>Bb>bb","BB>Bb=bb","BB>bb>Bb","BB=bb>Bb","bb>BB>Bb","bb>Bb=BB"))
gg_scatter <- ggplot() +
geom_point(data=internal, aes(x=angle, y=neglogFDR),shape=1) +
theme_bw() +
theme(axis.text.x=element_blank()) +
coord_polar(theta="x",clip="off") +
scale_x_continuous(breaks = seq(-pi, pi, pi/3), limits = c(-pi, pi)) +
scale_y_continuous(breaks = seq(0,radial_lim,5),limits = c(0,radial_lim)) +
geom_vline(xintercept = c(atan2(1,2),atan2(1,1),atan2(2,1),atan2(1,0),atan2(1,-1),atan2(0,-1),atan2(-1,-2),atan2(-1,-1),atan2(-2,-1),atan2(-1,0),atan2(-1,1),atan2(0,1))) +
geom_histogram(d=internal,aes(x=angle,y=10*..ncount..),fill="green", alpha=0.3,bins=60) +
geom_text(data=d, mapping=aes(x=angle, y=radial_lim, label=category), size=4, angle=0, vjust=0, hjust=0.5)
return(gg_scatter)
} ## Make the Polar coordinate Dominance Plot
make_volcano <- function(et,Title) {
library(ggplot2)
resFilt <- topTags(et, n=nrow(et$table))
volcanoData <- cbind(resFilt$table$logFC, -log10(resFilt$table$FDR))
colnames(volcanoData) <- c("logFC", "negLogPval")
volcanoData <- data.frame(volcanoData)
volcanoData$Sig <- volcanoData$negLogPval > log10(0.01)*(-1)
x_bounds = round(max(abs(volcanoData$logFC)))
y_bound = round(volcanoData$negLogPval)
# first set up the plot
volcPlot <- ggplot(volcanoData,aes(logFC,negLogPval))
# then add the points
volcPlot <- volcPlot + geom_point(aes(colour = Sig),alpha=0.4, size=2)
# volcPlot <- volcPlot + geom_hline(yintercept = log10(0.01)*(-1),size=1) + geom_vline(xintercept = -1,size=1) + geom_vline(xintercept = 1,size=1) + geom_vline(xintercept = 0,size=1) +
volcPlot <- volcPlot + geom_hline(yintercept = log10(0.01)*(-1),size=1) + geom_vline(xintercept = 0,size=1) +
labs(x = "logFC", y = "-log10(P-value)",title=Title) +
theme(legend.position="none", axis.title.x = element_text(face="bold", size=16), axis.text.x = element_text(size=12), axis.title.y = element_text(face="bold", size=16), axis.text.y = element_text(size=12)) +
scale_x_continuous(limits=c((-1)*x_bounds,x_bounds),breaks=c((-1)*x_bounds,-1,0,1,x_bounds)) +
# scale_y_continuous(limits=c(0,y_bound)) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(hjust = 0.5))
test <- topTags(et,p.value = 0.01,n=100000)
length(test$table$logFC)
LO <- sum(test$table$logFC < (-1))
LI <- sum((test$table$logFC < 0)&(test$table$logFC >= (-1)))
RI <- sum((test$table$logFC <= 1)&(test$table$logFC > 0))
RO <- sum(test$table$logFC > 1)
print(LO)
print(LI)
print(RI)
print(RO)
return(volcPlot)
} ## Create a standardized volcano plot for a given edgeR QLFtest output.
generate_GO_table <- function(data,Name) {
QLF_TT_all <- topTags(data,p.value = 1,n=Inf)$table
QLF_TT_sig <- topTags(data,p.value = 0.01,n=Inf)$table
## Load in custom annotation
sinv_GO <- readMappings("~/Desktop/Sinv_Analyses/NCBI_genome/NCBI_lg_gene.annot.map", sep = "\t", IDsep=",")
## Load Specific Gene List
myInterestingGenes <- row.names(QLF_TT_sig)
## Load Full Gene Set
geneNames <- row.names(QLF_TT_all)
## Create geneList Structure
geneList <- factor(as.integer(geneNames %in% myInterestingGenes))
names(geneList) <- geneNames
## Prep for file output
output_file = Name
### Run for BP
## Laod the data into a topGO data object (BP,MF,CC)
GOData <- new("topGOdata",ontology="BP",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
## Compute GO term enrichment
resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
allGO = usedGO(object = GOData)
allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
allRes$genes <- aa
res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
output_file4 = paste(output_file,"TopGO_BP_sig.pdf", sep="_")
pdf(file = output_file4,width = 10)
plot.new()
grid.table(res_print,rows=NULL)
dev.off()
GOData <- new("topGOdata",ontology="MF",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
## Compute GO term enrichment
resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
allGO = usedGO(object = GOData)
allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
allRes$genes <- aa
res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
output_file4 = paste(output_file,"TopGO_MF_sig.pdf", sep="_")
pdf(file = output_file4,width = 10)
plot.new()
grid.table(res_print,rows=NULL)
dev.off()
GOData <- new("topGOdata",ontology="CC",allGenes=geneList,description="GO analysis of DEGs with q<0.01", annot = annFUN.gene2GO, gene2GO = sinv_GO)
## Compute GO term enrichment
resultClassic <- runTest(GOData, algorithm="classic", statistic="fisher")
resultElim <- runTest(GOData, algorithm="elim", statistic="fisher")
resultTopgo <- runTest(GOData, algorithm="weight01", statistic="fisher")
allGO = usedGO(object = GOData)
allRes <- GenTable(GOData, classicFisher = resultClassic, elimFisher = resultElim, topgoFisher = resultTopgo, orderBy = "topgoFisher", ranksOf = "topgoFisher", topNodes = length(allGO))
aa <- sapply(unname(sapply(allRes$GO.ID, function(x) {genes<-genesInTerm(GOData, x); genes[[1]][sapply(genes[[1]],function(id) id %in% myInterestingGenes)]})),paste0,collapse=",")
allRes$genes <- aa
res_print <- allRes[(allRes$topgoFisher<=0.05),c(1:5,8)]
output_file4 = paste(output_file,"TopGO_CC_sig.pdf", sep="_")
pdf(file = output_file4,width = 10)
plot.new()
grid.table(res_print,rows=NULL)
dev.off()
return()
} ## Compute and Print tables of GO term enrichment values
convert_LOC <- function(dataframe) {
library(rtracklayer)
library(rentrez)
Si_gnG_Ensembl <- readGFF(file="~/Downloads/Solenopsis_invicta.Si_gnG.41.gff3")
Si_gnG_refseq <- readGFF(file="/Users/samarsenault/Desktop/Sinv_Analyses/NCBI_genome/GCF_000188075.1_Si_gnG/GCF_000188075.1_Si_gnG_genomic.gff")
refseq_translation <- data.frame(Si_gnG_refseq[(Si_gnG_refseq$type=="gene")|(Si_gnG_refseq$type=="pseudogene"),c("ID","Name")])
Ensembl_translation <- data.frame(Si_gnG_Ensembl[(Si_gnG_Ensembl$type=="gene")|(Si_gnG_Ensembl$type=="pseudogene"),c("ID","description")])
Ensembl_translation$ID <- gsub("gene:", "", Ensembl_translation$ID)
merged_translation <- merge(x=refseq_translation,y=Ensembl_translation,by.x="Name",by.y="ID")
g <- merge(x=dataframe,y=merged_translation,by.y="ID",by.x=0,all.x=TRUE)
for (i in 1:nrow(g)){
if (!is.na(g$Name[i])&is.na(g$description[i])){
search_term <- gsub(pattern = "LOC",replacement = "",x = g$Name[i])
g$description[i] <- entrez_summary(db="gene",id = search_term)[3]
}
}
return(g)
} ## Add descriptions and proper gene names (LOC) to the data.frame
generate_heatmap_frame <- function(gene_names){
library(karyoploteR)
temp <- Sinv_annot[(row.names(Sinv_annot) %in% gene_names),]
temp.sig<-temp[(grepl("lg",temp$chr)),]
temp.sig <- temp.sig[complete.cases(temp.sig), ]
DEG_GR <- makeGRangesFromDataFrame(temp.sig, seqnames.field = "chr",start.field = "start",end.field = "end",keep.extra.columns = TRUE)
windows <- unlist(tileGenome(stats::setNames(kp$chromosome.lengths, kp$chromosomes),tilewidth = 2e6))
dens <- countOverlaps(windows, DEG_GR)
values(windows) <- data.frame(y = dens)
return(windows)
} ## Takes a list of gene names and makes a heatmap object to pass to karyoploteR
generate_heatmap_frame_SNP <- function(SNP_data){
library(karyoploteR)
DEG_GR <- makeGRangesFromDataFrame(SNP_data, seqnames.field = "lg",start.field = "newstart",end.field = "newend",keep.extra.columns = TRUE)
windows <- unlist(tileGenome(stats::setNames(kp$chromosome.lengths, kp$chromosomes),tilewidth = 500000))
dens <- countOverlaps(windows, DEG_GR)
values(windows) <- data.frame(y = dens)
return(windows)
} ## Takes a list of SNPs and makes a heatmap object to pass to karyoploteR
check_supergene_presence_bias <- function(all_genes_TT_table){
all_genes_TT_table_merge <- merge(all_genes_TT_table,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
supergene_genes_int <- all_genes_TT_table_merge[((all_genes_TT_table_merge$X1=="lg16")&(all_genes_TT_table_merge$X2>=7311525)&(all_genes_TT_table_merge$X3<=18123987)),c("Row.names")]
supergene_genes_int <- supergene_genes_int[!is.na(supergene_genes_int)]
nonsuper_genes_int <- all_genes_TT_table_merge[((grepl(pattern = "lg",x = all_genes_TT_table_merge$X1))&(all_genes_TT_table_merge$Row.names %!in% supergene_genes_int)),c("Row.names")]
in_super_sig <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% supergene_genes_int)&(all_genes_TT_table$FDR<=0.01)),]
in_super_not <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% supergene_genes_int)&(all_genes_TT_table$FDR>0.01)),]
out_super_sig <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% nonsuper_genes_int)&(all_genes_TT_table$FDR<=0.01)),]
out_super_not <- all_genes_TT_table[((row.names(all_genes_TT_table) %in% nonsuper_genes_int)&(all_genes_TT_table$FDR>0.01)),]
out <- c(nrow(in_super_sig),nrow(in_super_not),nrow(out_super_sig),nrow(out_super_not),nrow(all_genes_TT_table))
print(out[1:4])
print(sum(out[1:4])-out[5])
chi_out <- chisq.test(matrix(out[1:4],nrow=2),correct = FALSE)
print(chi_out$observed)
print(chi_out$expected)
print(chi_out$p.value)
print(fisher.test(x=matrix(out[1:4],nrow=2),alternative = "two.sided"))
return()
} ## Takes a full TopTags file (all p-values) and checks for an overrepresentation of significant DEGs in the supergene as opposed to outside of it.
check_supergene_dir_bias <- function(sig_genes_TT_table){
sig_genes_TT_table_merge <- merge(sig_genes_TT_table,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
supergene_genes_int <- sig_genes_TT_table_merge[((sig_genes_TT_table_merge$X1=="lg16")&(sig_genes_TT_table_merge$X2>=7311525)&(sig_genes_TT_table_merge$X3<=18123987)),c("Row.names")]
supergene_genes_int <- supergene_genes_int[!is.na(supergene_genes_int)]
nonsuper_genes_int <- sig_genes_TT_table_merge[((grepl(pattern = "lg",x = sig_genes_TT_table_merge$X1))&(sig_genes_TT_table_merge$Row.names %!in% supergene_genes_int)),c("Row.names")]
in_super_up <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% supergene_genes_int)&(sig_genes_TT_table$logFC>0)),]
in_super_down <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% supergene_genes_int)&(sig_genes_TT_table$logFC<0)),]
out_super_up <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% nonsuper_genes_int)&(sig_genes_TT_table$logFC>0)),]
out_super_down <- sig_genes_TT_table[((row.names(sig_genes_TT_table) %in% nonsuper_genes_int)&(sig_genes_TT_table$logFC<0)),]
out <- c(nrow(in_super_up),nrow(in_super_down),nrow(out_super_up),nrow(out_super_down),nrow(sig_genes_TT_table))
print(out[1:4])
print(sum(out[1:4])-out[5])
chi_out <- chisq.test(matrix(out[1:4],nrow=2),correct = FALSE)
print(chi_out$observed)
print(chi_out$expected)
print(chi_out$p.value)
print(fisher.test(x=matrix(out[1:4],nrow=2),alternative = "two.sided"))
return()
} ## Takes a table of significant DEGs and assesses whether or not there is a directional bias of differential expression within versus outside of the supergene.
'%!in%' <- function(x,y)!('%in%'(x,y)) ## Quality of life function
## Load in the gene lengths computed from the annotation for RPKM calculation
len_file=read.delim(file="/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_NCBI_counts/Genelength.tsv",sep="\t")
head(len_file)
## GeneID Length
## 1 gene0 1839
## 2 gene1 894
## 3 gene2 2253
## 4 gene3 870
## 5 gene4 76
## 6 gene5 972
## Load in the counts file generated using the 2-pass method from STAR
x <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_NCBI_counts/NCBI_All_counts.tsv",row.names=1,sep="\t")
head(x)
## lib_104AB lib_104AO lib_104DB lib_104GB lib_104GO lib_107AB
## gene0 0 27 0 9 27 0
## gene1 0 0 0 0 0 0
## gene2 1 16 0 0 7 0
## gene3 23142 2727 21787 22937 2385 10573
## gene4 0 0 0 0 0 0
## gene5 14 4 0 8 4 4
## lib_107AO lib_107EB lib_107EO lib_107GB lib_107GO lib_15AB lib_15AO
## gene0 24 0 9 0 10 0 10
## gene1 0 0 0 0 0 0 0
## gene2 24 29 10 88 8 41 4
## gene3 3340 15382 4155 17307 4557 6960 7256
## gene4 0 0 0 0 0 0 0
## gene5 3 15 1 20 0 12 0
## lib_15DB lib_15DO lib_15GB lib_15GO lib_16CB lib_16CO lib_16DB
## gene0 0 20 0 0 0 0 0
## gene1 0 0 0 0 0 0 0
## gene2 5 14 0 16 27 25 0
## gene3 5436 3270 15016 3305 1937 2446 7587
## gene4 0 0 0 0 0 0 0
## gene5 2 1 2 1 34 0 25
## lib_16DO lib_16GB lib_16GO lib_19AB lib_19AO lib_19DB lib_19DO
## gene0 21 0 3 0 8 0 0
## gene1 0 0 0 0 0 0 0
## gene2 9 0 11 20 3 38 8
## gene3 2199 6104 3032 13182 3015 12381 2358
## gene4 0 0 0 0 0 0 0
## gene5 0 6 0 25 1 82 0
## lib_19GB lib_19GO lib_1EB lib_1EO lib_207AB lib_207AO lib_209AB
## gene0 0 6 2 57 42 5 0
## gene1 0 0 0 0 0 0 0
## gene2 3 12 5 15 71 2 41
## gene3 13280 7304 904 1416 11555 3013 16333
## gene4 0 0 0 0 0 0 0
## gene5 25 0 48 3 73 0 24
## lib_209AO lib_20AB lib_20AO lib_20DB lib_20DO lib_20IB lib_20IO
## gene0 0 5 13 0 2 0 14
## gene1 0 0 0 0 0 0 0
## gene2 9 86 11 24 11 2 11
## gene3 4266 9281 3669 24916 4310 17208 4228
## gene4 0 0 0 0 0 0 0
## gene5 0 35 0 19 0 23 0
## lib_222AB lib_222AO lib_232AB lib_232AO lib_233AB lib_233AO
## gene0 0 5 0 4 0 4
## gene1 0 0 0 0 0 0
## gene2 26 8 44 10 0 4
## gene3 31449 3370 10125 3138 12777 3220
## gene4 0 0 0 0 0 0
## gene5 13 0 9 0 36 0
## lib_235AB lib_235AO lib_239AB lib_239AO lib_240AB lib_240AO lib_30AB
## gene0 0 0 0 4 0 0 2
## gene1 0 0 0 0 0 0 0
## gene2 24 5 0 9 0 5 267
## gene3 12547 5417 25319 4123 17585 3988 5901
## gene4 0 0 0 0 0 0 0
## gene5 20 0 1 0 15 0 6
## lib_30AO lib_30EB lib_30EO lib_30GB lib_30GO lib_5AB lib_5AO lib_5GB
## gene0 13 0 3 0 1 0 9 10
## gene1 0 0 0 0 0 0 0 0
## gene2 2 5 8 89 8 44 11 15
## gene3 2905 7458 4653 11744 2184 4405 3841 5980
## gene4 0 0 0 0 0 0 0 0
## gene5 0 11 0 12 0 35 0 18
## lib_5GO
## gene0 8
## gene1 0
## gene2 10
## gene3 2152
## gene4 0
## gene5 0
## Load in the model matrix containing information formatted for both pairiwse and GLM comparisons
adv_group <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_model_matrix5.tsv",row.names = 1,sep = "\t")
head(adv_group)
## Individual_ID Colony_ID Tissue Social.Form Genotype
## 104AB 104A 104 Brain Polygyne Bb
## 104AO 104A 104 Ovary Polygyne Bb
## 104DB 104D 104 Brain Polygyne BB
## 104GB 104G 104 Brain Polygyne bb
## 104GO 104G 104 Ovary Polygyne bb
## 107AB 107A 107 Brain Polygyne Bb
## Alt_ID
## 104AB Brain.Polygyne.Bb
## 104AO Ovary.Polygyne.Bb
## 104DB Brain.Polygyne.BB
## 104GB Brain.Polygyne.bb
## 104GO Ovary.Polygyne.bb
## 107AB Brain.Polygyne.Bb
## Load in the annotation information from a bed file
Sinv_annot <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lg_gene_v3.bed",
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_double(),
## X3 = col_double(),
## X4 = col_character()
## )
row.names(Sinv_annot) <- Sinv_annot$X4 ## Rename the rows of the this variable with gene names
## Warning: Setting row names on a tibble is deprecated.
colnames(Sinv_annot) <- c("chr","start","end","gene") ## rename the columns with better description
head(Sinv_annot)
## # A tibble: 6 x 4
## chr start end gene
## <chr> <dbl> <dbl> <chr>
## 1 NC_014672.1 10229 11174 gene15723
## 2 NC_014672.1 14342 15323 gene15724
## 3 NC_014672.1 4157 4507 gene15717
## 4 NC_014672.1 4929 6596 gene15718
## 5 NC_014672.1 8466 8996 gene15721
## 6 NC_014672.1 9021 10142 gene15722
## Load in the linkage mapped genome annotations
Sinv_annot_lg <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lg_gene_v4.bed",
"\t", escape_double = FALSE, col_names = FALSE,
trim_ws = TRUE)
## Parsed with column specification:
## cols(
## X1 = col_character(),
## X2 = col_double(),
## X3 = col_double(),
## X4 = col_character()
## )
y_pw <- DGEList(x,group = adv_group$Alt_ID)
y_pw$samples
## group lib.size norm.factors
## lib_104AB Brain.Polygyne.Bb 23839271 1
## lib_104AO Ovary.Polygyne.Bb 21048826 1
## lib_104DB Brain.Polygyne.BB 23454386 1
## lib_104GB Brain.Polygyne.bb 24389394 1
## lib_104GO Ovary.Polygyne.bb 20495668 1
## lib_107AB Brain.Polygyne.Bb 20141694 1
## lib_107AO Ovary.Polygyne.Bb 17197423 1
## lib_107EB Brain.Polygyne.BB 21829149 1
## lib_107EO Ovary.Polygyne.BB 19294902 1
## lib_107GB Brain.Polygyne.bb 25168093 1
## lib_107GO Ovary.Polygyne.bb 16074944 1
## lib_15AB Brain.Polygyne.Bb 14960249 1
## lib_15AO Ovary.Polygyne.Bb 16752710 1
## lib_15DB Brain.Polygyne.BB 13227223 1
## lib_15DO Ovary.Polygyne.BB 19369746 1
## lib_15GB Brain.Polygyne.bb 15342322 1
## lib_15GO Ovary.Polygyne.bb 15462774 1
## lib_16CB Brain.Polygyne.Bb 10874244 1
## lib_16CO Ovary.Polygyne.Bb 18167411 1
## lib_16DB Brain.Polygyne.BB 18118617 1
## lib_16DO Ovary.Polygyne.BB 15154985 1
## lib_16GB Brain.Polygyne.bb 14252480 1
## lib_16GO Ovary.Polygyne.bb 11697881 1
## lib_19AB Brain.Polygyne.Bb 21454629 1
## lib_19AO Ovary.Polygyne.Bb 14055961 1
## lib_19DB Brain.Polygyne.BB 22242852 1
## lib_19DO Ovary.Polygyne.BB 12250091 1
## lib_19GB Brain.Polygyne.bb 22026200 1
## lib_19GO Ovary.Polygyne.bb 18419244 1
## lib_1EB Brain.Polygyne.BB 9465077 1
## lib_1EO Ovary.Polygyne.BB 15489624 1
## lib_207AB Brain.Monogyne.BB 23545677 1
## lib_207AO Ovary.Monogyne.BB 13189348 1
## lib_209AB Brain.Monogyne.BB 25094494 1
## lib_209AO Ovary.Monogyne.BB 18649952 1
## lib_20AB Brain.Polygyne.Bb 20937228 1
## lib_20AO Ovary.Polygyne.Bb 19166278 1
## lib_20DB Brain.Polygyne.BB 21474982 1
## lib_20DO Ovary.Polygyne.BB 20286421 1
## lib_20IB Brain.Polygyne.bb 23393220 1
## lib_20IO Ovary.Polygyne.bb 19737048 1
## lib_222AB Brain.Monogyne.BB 26950035 1
## lib_222AO Ovary.Monogyne.BB 18315290 1
## lib_232AB Brain.Monogyne.BB 15462514 1
## lib_232AO Ovary.Monogyne.BB 17487369 1
## lib_233AB Brain.Monogyne.BB 18101509 1
## lib_233AO Ovary.Monogyne.BB 16506089 1
## lib_235AB Brain.Monogyne.BB 17354499 1
## lib_235AO Ovary.Monogyne.BB 22630544 1
## lib_239AB Brain.Monogyne.BB 23630798 1
## lib_239AO Ovary.Monogyne.BB 19564509 1
## lib_240AB Brain.Monogyne.BB 19079343 1
## lib_240AO Ovary.Monogyne.BB 18214783 1
## lib_30AB Brain.Polygyne.Bb 18465236 1
## lib_30AO Ovary.Polygyne.Bb 13595740 1
## lib_30EB Brain.Polygyne.BB 13622038 1
## lib_30EO Ovary.Polygyne.BB 18007543 1
## lib_30GB Brain.Polygyne.bb 16763253 1
## lib_30GO Ovary.Polygyne.bb 10097662 1
## lib_5AB Brain.Polygyne.Bb 13352005 1
## lib_5AO Ovary.Polygyne.Bb 11809254 1
## lib_5GB Brain.Polygyne.bb 16318389 1
## lib_5GO Ovary.Polygyne.bb 11915595 1
## Filter out low count genes
keep <- rowSums(cpm(y_pw)>10/9) >= 8 ## NEED TO EDIT!!!
len_file <- len_file[ as.vector(keep) , ]
y_pw <- y_pw[keep, , keep.lib.sizes=FALSE]
## Normalize samples by library size
y_pw <- calcNormFactors(y_pw)
Background.genes <- row.names(y_pw$counts)
## Calculate RPKM values
adv_group$FullID <- paste(adv_group$Individual_ID,adv_group$Alt_ID,sep = ".")
## Begin Computing Differential Expression
design_pw <- model.matrix(~0+adv_group$Alt_ID,data=y_pw$samples)
colnames(design_pw) <- levels(y_pw$samples$group)
y_pw <- estimateDisp(y_pw,design_pw)
y_pw <- estimateGLMCommonDisp(y_pw, design_pw)
y_pw <- estimateGLMTrendedDisp(y_pw, design_pw)
y_pw <- estimateGLMTagwiseDisp(y_pw, design_pw)
## QLM Fit and DE analysis
fit_pw <- glmQLFit(y_pw,design_pw)
my.contrasts <- makeContrasts(Brain_M_v_P_BB=Brain.Monogyne.BB-Brain.Polygyne.BB,
Brain_BB_v_Bb=Brain.Polygyne.BB-Brain.Polygyne.Bb,
Brain_Bb_v_bb=Brain.Polygyne.Bb-Brain.Polygyne.bb,
Brain_mBB_v_pBb=Brain.Monogyne.BB-Brain.Polygyne.Bb,
Brain_mBB_v_pbb=Brain.Monogyne.BB-Brain.Polygyne.bb,
Ovary_M_v_P_BB=Ovary.Monogyne.BB-Ovary.Polygyne.BB,
Ovary_BB_v_Bb=Ovary.Polygyne.BB-Ovary.Polygyne.Bb,
Ovary_Bb_v_bb=Ovary.Polygyne.Bb-Ovary.Polygyne.bb,
Ovary_mBB_v_pBb=Ovary.Monogyne.BB-Ovary.Polygyne.Bb,
Ovary_mBB_v_pbb=Ovary.Monogyne.BB-Ovary.Polygyne.bb,
Brain_v_Ovary_pBB=Brain.Polygyne.BB-Ovary.Polygyne.BB,
Brain_v_Ovary_mBB=Brain.Monogyne.BB-Ovary.Monogyne.BB,
Brain_v_Ovary_pBb=Brain.Polygyne.Bb-Ovary.Polygyne.Bb,
Brain_v_Ovary_pbb=Brain.Polygyne.bb-Ovary.Polygyne.bb,
Ovary_BB_v_bb=Ovary.Polygyne.BB-Ovary.Polygyne.bb,
Brain_BB_v_bb=Brain.Polygyne.BB-Brain.Polygyne.bb,
levels=design_pw)
pw.qlf.Brain_M_v_P_BB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_M_v_P_BB"])
pw.qlf.Brain_M_v_P_BB.TT <- topTags(pw.qlf.Brain_M_v_P_BB,p.value = 1,n=100000)
pw.qlf.Brain_M_v_P_BB.TT.sig <- topTags(pw.qlf.Brain_M_v_P_BB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_M_v_P_BB.TT.names <- row.names(pw.qlf.Brain_M_v_P_BB.TT$table)
pw.qlf.Ovary_M_v_P_BB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_M_v_P_BB"])
pw.qlf.Ovary_M_v_P_BB.TT <- topTags(pw.qlf.Ovary_M_v_P_BB,p.value = 1,n=100000)
pw.qlf.Ovary_M_v_P_BB.TT.sig <- topTags(pw.qlf.Ovary_M_v_P_BB,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_M_v_P_BB.TT.names <- row.names(pw.qlf.Ovary_M_v_P_BB.TT$table)
pw.qlf.Brain_BB_v_Bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_BB_v_Bb"])
pw.qlf.Brain_BB_v_Bb.TT <- topTags(pw.qlf.Brain_BB_v_Bb,p.value = 1,n=100000)
pw.qlf.Brain_BB_v_Bb.TT.sig <- topTags(pw.qlf.Brain_BB_v_Bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_BB_v_Bb.TT.names <- row.names(pw.qlf.Brain_BB_v_Bb.TT$table)
pw.qlf.Ovary_BB_v_Bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_BB_v_Bb"])
pw.qlf.Ovary_BB_v_Bb.TT <- topTags(pw.qlf.Ovary_BB_v_Bb,p.value = 1,n=100000)
pw.qlf.Ovary_BB_v_Bb.TT.sig <- topTags(pw.qlf.Ovary_BB_v_Bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_BB_v_Bb.TT.names <- row.names(pw.qlf.Ovary_BB_v_Bb.TT$table)
pw.qlf.Brain_Bb_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_Bb_v_bb"])
pw.qlf.Brain_Bb_v_bb.TT <- topTags(pw.qlf.Brain_Bb_v_bb,p.value = 1,n=100000)
pw.qlf.Brain_Bb_v_bb.TT.sig <- topTags(pw.qlf.Brain_Bb_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_Bb_v_bb.TT.names <- row.names(pw.qlf.Brain_Bb_v_bb.TT$table)
pw.qlf.Ovary_Bb_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_Bb_v_bb"])
pw.qlf.Ovary_Bb_v_bb.TT <- topTags(pw.qlf.Ovary_Bb_v_bb,p.value = 1,n=100000)
pw.qlf.Ovary_Bb_v_bb.TT.sig <- topTags(pw.qlf.Ovary_Bb_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_Bb_v_bb.TT.names <- row.names(pw.qlf.Ovary_Bb_v_bb.TT$table)
pw.qlf.Brain_v_Ovary_pBB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pBB"])
pw.qlf.Brain_v_Ovary_pBB.TT <- topTags(pw.qlf.Brain_v_Ovary_pBB,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pBB.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pBB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pBB.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pBB.TT$table)
pw.qlf.Brain_v_Ovary_mBB <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_mBB"])
pw.qlf.Brain_v_Ovary_mBB.TT <- topTags(pw.qlf.Brain_v_Ovary_mBB,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_mBB.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_mBB,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_mBB.TT.names <- row.names(pw.qlf.Brain_v_Ovary_mBB.TT$table)
pw.qlf.Brain_v_Ovary_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pBb"])
pw.qlf.Brain_v_Ovary_pBb.TT <- topTags(pw.qlf.Brain_v_Ovary_pBb,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pBb.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pBb.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pBb.TT$table)
pw.qlf.Brain_v_Ovary_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_v_Ovary_pbb"])
pw.qlf.Brain_v_Ovary_pbb.TT <- topTags(pw.qlf.Brain_v_Ovary_pbb,p.value = 1,n=100000)
pw.qlf.Brain_v_Ovary_pbb.TT.sig <- topTags(pw.qlf.Brain_v_Ovary_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_v_Ovary_pbb.TT.names <- row.names(pw.qlf.Brain_v_Ovary_pbb.TT$table)
pw.qlf.Brain_BB_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_BB_v_bb"])
pw.qlf.Brain_BB_v_bb.TT <- topTags(pw.qlf.Brain_BB_v_bb,p.value = 1,n=100000)
pw.qlf.Brain_BB_v_bb.TT.sig <- topTags(pw.qlf.Brain_BB_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_BB_v_bb.TT.names <- row.names(pw.qlf.Brain_BB_v_bb.TT$table)
pw.qlf.Ovary_BB_v_bb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_BB_v_bb"])
pw.qlf.Ovary_BB_v_bb.TT <- topTags(pw.qlf.Ovary_BB_v_bb,p.value = 1,n=100000)
pw.qlf.Ovary_BB_v_bb.TT.sig <- topTags(pw.qlf.Ovary_BB_v_bb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_BB_v_bb.TT.names <- row.names(pw.qlf.Ovary_BB_v_bb.TT$table)
pw.qlf.Ovary_mBB_v_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_mBB_v_pBb"])
pw.qlf.Ovary_mBB_v_pBb.TT <- topTags(pw.qlf.Ovary_mBB_v_pBb,p.value = 1,n=100000)
pw.qlf.Ovary_mBB_v_pBb.TT.sig <- topTags(pw.qlf.Ovary_mBB_v_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_mBB_v_pBb.TT.names <- row.names(pw.qlf.Ovary_mBB_v_pBb.TT$table)
pw.qlf.Ovary_mBB_v_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Ovary_mBB_v_pbb"])
pw.qlf.Ovary_mBB_v_pbb.TT <- topTags(pw.qlf.Ovary_mBB_v_pbb,p.value = 1,n=100000)
pw.qlf.Ovary_mBB_v_pbb.TT.sig <- topTags(pw.qlf.Ovary_mBB_v_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Ovary_mBB_v_pbb.TT.names <- row.names(pw.qlf.Ovary_mBB_v_pbb.TT$table)
pw.qlf.Brain_mBB_v_pBb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_mBB_v_pBb"])
pw.qlf.Brain_mBB_v_pBb.TT <- topTags(pw.qlf.Brain_mBB_v_pBb,p.value = 1,n=100000)
pw.qlf.Brain_mBB_v_pBb.TT.sig <- topTags(pw.qlf.Brain_mBB_v_pBb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_mBB_v_pBb.TT.names <- row.names(pw.qlf.Brain_mBB_v_pBb.TT$table)
pw.qlf.Brain_mBB_v_pbb <- glmQLFTest(fit_pw, contrast=my.contrasts[,"Brain_mBB_v_pbb"])
pw.qlf.Brain_mBB_v_pbb.TT <- topTags(pw.qlf.Brain_mBB_v_pbb,p.value = 1,n=100000)
pw.qlf.Brain_mBB_v_pbb.TT.sig <- topTags(pw.qlf.Brain_mBB_v_pbb,p.value = 0.01,n=100000)$table
pw.qlf.Brain_mBB_v_pbb.TT.names <- row.names(pw.qlf.Brain_mBB_v_pbb.TT$table)
## Generate Volcano Plots for the Pairiwse Comparisons
# Pairwise, genotype-specific comparisons (Figure S1,A-D)
make_volcano(pw.qlf.Brain_BB_v_Bb,"Brain SB/SB vs. SB/Sb")
## [1] 36
## [1] 1
## [1] 0
## [1] 5
make_volcano(pw.qlf.Brain_BB_v_bb,"Brain SB/SB vs. Sb/Sb")
## [1] 68
## [1] 6
## [1] 3
## [1] 67
## Warning: Removed 3 rows containing missing values (geom_point).
make_volcano(pw.qlf.Ovary_BB_v_Bb,"Ovary SB/SB vs. SB/Sb")
## [1] 40
## [1] 2
## [1] 1
## [1] 0
## Warning: Removed 1 rows containing missing values (geom_point).
make_volcano(pw.qlf.Ovary_BB_v_bb,"Ovary SB/SB vs. Sb/Sb")
## [1] 179
## [1] 20
## [1] 13
## [1] 28
## Warning: Removed 1 rows containing missing values (geom_point).
# Pairwise, social form comparisons (Embedded within Figure 3B,C)
make_volcano(pw.qlf.Brain_M_v_P_BB,"Brain Monogyne SB/SB vs. Polygyne SB/SB")
## [1] 13
## [1] 3
## [1] 1
## [1] 5
make_volcano(pw.qlf.Ovary_M_v_P_BB,"Ovary Monogyne SB/SB vs. Polygyne SB/SB")
## [1] 0
## [1] 0
## [1] 0
## [1] 0
# Pairwise, combinatorial-effect comparisons (Embedded within Figure 3B,C)
make_volcano(pw.qlf.Brain_mBB_v_pBb,"Brain Monogyne SB/SB vs. Polygyne SB/Sb")
## [1] 30
## [1] 0
## [1] 2
## [1] 4
## Warning: Removed 1 rows containing missing values (geom_point).
make_volcano(pw.qlf.Ovary_mBB_v_pBb,"Ovary Monogyne SB/SB vs. Polygyne SB/Sb")
## [1] 175
## [1] 14
## [1] 9
## [1] 7
## Warning: Removed 1 rows containing missing values (geom_point).
## Generate UpSetR plot of Pairwise comparisons (Figure 2C)
PW_set <- list(Brain_pBB_pBb=row.names(pw.qlf.Brain_BB_v_Bb.TT.sig),
Brain_pBB_pbb=row.names(pw.qlf.Brain_BB_v_bb.TT.sig),
Ovary_pBB_pBb=row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig),
Ovary_pBB_pbb=row.names(pw.qlf.Ovary_BB_v_bb.TT.sig)
)
upset(fromList(PW_set),nsets=4,order.by = "freq")
## Generate Social Form Euler Plots (Figure 3B,C)
Tissue_set <- list(
Brain_mBBvpBB=row.names(pw.qlf.Brain_M_v_P_BB.TT.sig),
Ovary_mBBvpBB=row.names(pw.qlf.Ovary_M_v_P_BB.TT.sig),
Brain_pBBvpBb=row.names(pw.qlf.Brain_BB_v_Bb.TT.sig),
Ovary_pBBvpBb=row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig),
Brain_mBBvpBb=row.names(pw.qlf.Brain_mBB_v_pBb.TT.sig),
Ovary_mBBvpBb=row.names(pw.qlf.Ovary_mBB_v_pBb.TT.sig)
)
brain_euler <- plot(euler(Tissue_set[c(1,3,5)], shape = "ellipse"), quantities = TRUE)
brain_euler
ovary_euler <- plot(euler(Tissue_set[c(2,4,6)], shape = "ellipse"), quantities = TRUE)
## Warning in colSums(id & !empty) == 0 | merged_sets: longer object length is
## not a multiple of shorter object length
ovary_euler
## Format model matrix
Colony_ID <- factor(adv_group$Colony_ID)
Tissue <- factor(adv_group$Tissue)
Social.Form <- factor(adv_group$Social.Form)
Genotype_temp <- factor(adv_group$Genotype)
Genotype2 <- relevel(Genotype_temp, ref = "BB")
## Describe the Experiment Design and define Contrasts
glm_design <- model.matrix(~0 + Colony_ID + Social.Form + Tissue * Genotype2,
data = adv_group)
colnames(glm_design) <- c("Colony", "Monogyne", "Polygyne", "TissueOvary", "Genotypebb",
"GenotypeBb", "Tissuebb", "TissueBb")
y_glm <- DGEList(x, samples = glm_design)
## Filter out low count genes
y_glm <- y_glm[(rowSums(cpm(y_glm) > 10/9) >= 8), , keep.lib.sizes = FALSE] ## Based on the smallest number of aligned reads in our libraries as per edgeR documentation
### Normalize samples by library size
y_glm <- calcNormFactors(y_glm)
Background.genes <- row.names(y_glm$counts)
## Begin Computing Differential Expression
y_glm <- estimateDisp(y_glm, glm_design)
y_glm <- estimateGLMCommonDisp(y_glm, glm_design)
y_glm <- estimateGLMTrendedDisp(y_glm, glm_design)
y_glm <- estimateGLMTagwiseDisp(y_glm, glm_design)
## QLM Fit and DE analysis
fit_glm <- glmQLFit(y_glm, glm_design)
## Compute Differential expression based on number of supergene alleles DE
## Analysis for Paper
glm.QLF.BBvBb <- glmQLFTest(fit_glm, coef = "GenotypeBb")
glm.QLF.BBvBb_TT <- topTags(glm.QLF.BBvBb, n = Inf)
glm.QLF.BBvBb_TT_sig <- topTags(glm.QLF.BBvBb_TT, p.value = 0.01, n = Inf)$table
glm.QLF.BBvbb <- glmQLFTest(fit_glm, coef = "Genotypebb")
glm.QLF.BBvbb_TT <- topTags(glm.QLF.BBvbb, n = Inf)
glm.QLF.BBvbb_TT_sig <- topTags(glm.QLF.BBvbb_TT, p.value = 0.01, n = Inf)$table
glm.QLF.all <- glmQLFTest(fit_glm, coef = 5:6)
glm.QLF.all_TT <- topTags(glm.QLF.all, n = Inf)
glm.QLF.all_TT_sig <- topTags(glm.QLF.all_TT, p.value = 0.01, n = Inf)$table
## Compute differential expression based on social form
con.SF <- makeContrasts(Monogyne - Polygyne, levels = glm_design)
glm.QLF.SF <- glmQLFTest(fit_glm, contrast = con.SF)
glm.QLF.SF_TT <- topTags(glm.QLF.SF, n = Inf)
glm.QLF.SF_TT_sig <- topTags(glm.QLF.SF_TT, p.value = 0.01, n = Inf)$table
## Visualizations ## Note here that due to the way the contrasts are set up,
## the directionality of the volcano plots is flipped. This was rectified in
## the manuscript. BB vs. Bb Volcano Plot (Figure 2A)
make_volcano(glm.QLF.BBvBb, "GLM BB vs. Bb")
## [1] 1
## [1] 1
## [1] 1
## [1] 47
# BB vs. bb Volcano Plot (Figure 2B)
make_volcano(glm.QLF.BBvbb, "GLM BB vs. bb")
## [1] 35
## [1] 2
## [1] 9
## [1] 87
# Monogyne vs Polygyne Volcano Plot (Figure 3A)
make_volcano(glm.QLF.SF, "GLM Monogyne vs. Polygyne")
## [1] 0
## [1] 0
## [1] 0
## [1] 0
## GO Term Enrichment Analysis
generate_GO_table(pw.qlf.Brain_BB_v_Bb, "Brain_BBvBL")
## NULL
generate_GO_table(pw.qlf.Brain_BB_v_bb, "Brain_BBvLL")
## NULL
generate_GO_table(pw.qlf.Ovary_BB_v_Bb, "Ovary_BBvBL")
## NULL
generate_GO_table(pw.qlf.Ovary_BB_v_bb, "Ovary_BBvLL")
## NULL
generate_GO_table(glm.QLF.BBvBb, "GLM_BBvBL")
## NULL
generate_GO_table(glm.QLF.BBvbb, "GLM_BBvLL")
## NULL
## Add Tags for Candidate Genes (These genes were derived from overlapping the variouw differential expression analyses)
load(file="~/Desktop/Sinv_Analyses/Candidate_gene_names.RData")
Candidate_Genes <- c(Naming_Frame_sub$Row.names,"gene1476")
temp.sig1 <- Sinv_annot_lg[((Sinv_annot_lg$X4 %in% Candidate_Genes)&(grepl("lg",Sinv_annot_lg$X1))),]
temp.sig1 <- temp.sig1[complete.cases(temp.sig1), ]
row.names(temp.sig1) <- temp.sig1$X4
## Warning: Setting row names on a tibble is deprecated.
temp.sig1 <- convert_LOC(temp.sig1)
## Warning: package 'rtracklayer' was built under R version 3.5.2
## Warning: package 'rentrez' was built under R version 3.5.2
## Appended gene descriptions that were not included due to an upstream error
temp.sig1[5,"description"] <- "dopamine receptor 1 "
temp.sig1[10,"description"] <- "pheromone-binding protein Gp-9-like"
temp.sig1[11,"description"] <- "NADH dehydrogenase [ubiquinone] 1 alpha subcomplex subunit 9, mitochondrial-like"
temp.sig1[14,"description"] <- "putative nuclease HARBI1"
DEG_GR1 <- makeGRangesFromDataFrame(temp.sig1, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)
DEG_GR1$labels <- DEG_GR1$description
Chr_sizes_GR <- toGRanges("~/Desktop/Sinv_Analyses/lg_size.bed")
gene_bed_GR <- toGRanges("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/NCBI_lgonly_gene_v4.bed")
supergene_GR <- GRanges(seqnames = "lg16",ranges=IRanges(7311525,18123987)) ## Need to check points!!!
pp <- getDefaultPlotParams(plot.type = 4)
pp$data1inmargin <- 5
pp$rightmargin <- 0.2
kp <- plotKaryotype(genome = Chr_sizes_GR, plot.type=4, ideogram.plotter = NULL, labels.plotter = NULL,plot.params = pp)
kpAddCytobandsAsLine(kp,lwd = 30)
kpAddChromosomeNames(kp, srt=90)
core_bb_windows <- generate_heatmap_frame(row.names(glm.QLF.BBvbb_TT_sig))
core_Bb_windows <- generate_heatmap_frame(row.names(glm.QLF.BBvBb_TT_sig))
Brain_Bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Brain_BB_v_Bb.TT.sig))
Brain_bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Brain_BB_v_bb.TT.sig))
Ovary_Bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Ovary_BB_v_Bb.TT.sig))
Ovary_bb_windows <- generate_heatmap_frame(row.names(pw.qlf.Ovary_BB_v_bb.TT.sig))
## Add DEG density Heatmap
colfunc <- colorRampPalette(c("white", "blue"))
kpHeatmap(kp,data=core_bb_windows,r0=0.11,r1=0.15,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=core_Bb_windows,r0=0.16,r1=0.2,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Brain_bb_windows,r0=0.21,r1=0.25,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Brain_Bb_windows,r0=0.26,r1=0.3,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Ovary_bb_windows,r0=0.31,r1=0.35,colors = colfunc(5),ymin=0,ymax=20)
kpHeatmap(kp,data=Ovary_Bb_windows,r0=0.36,r1=0.4,colors = colfunc(5),ymin=0,ymax=20)
## Add supergene Marker
kpPlotRegions(kp,data = supergene_GR,col=adjustcolor("darkgreen",alpha.f = 0.5), r0=0.05,r1=0.1)
kpPlotMarkers(kp, DEG_GR1,labels = DEG_GR1$description,r0 = 0.41,r1=0.5,ignore.chromosome.ends = TRUE,max.iter = 10000,label.dist = -0.001,clipping = TRUE,marker.parts = c(0.1,0.8,0.1))
## Determine if there is a statistical overrepresentation of DEGs within the supergene and if it is directional.
## Check for overlap in supergene and DEGs for each pairwise comparison
check_supergene_presence_bias(glm.QLF.BBvBb_TT$table)
## [1] 17 354 17 8139
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 17 17
## [2,] 354 8139
## [,1] [,2]
## [1,] 1.479301 32.5207
## [2,] 369.520699 8123.4793
## [1] 4.633262e-39
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 5.933e-15
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 10.91879 48.26582
## sample estimates:
## odds ratio
## 22.96189
## NULL
check_supergene_presence_bias(glm.QLF.BBvbb_TT$table)
## [1] 43 328 38 8118
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 43 38
## [2,] 328 8118
## [,1] [,2]
## [1,] 3.524217 77.47578
## [2,] 367.475783 8078.52422
## [1] 1.651845e-103
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 17.39221 45.17243
## sample estimates:
## odds ratio
## 27.9656
## NULL
check_supergene_presence_bias(pw.qlf.Brain_BB_v_Bb.TT$table)
## [1] 14 357 14 8142
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 14 14
## [2,] 357 8142
## [,1] [,2]
## [1,] 1.218248 26.78175
## [2,] 369.781752 8129.21825
## [1] 1.902539e-32
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 1.576e-12
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 9.984269 51.955997
## sample estimates:
## odds ratio
## 22.77735
## NULL
check_supergene_presence_bias(pw.qlf.Brain_BB_v_bb.TT$table)
## [1] 38 333 58 8098
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 38 58
## [2,] 333 8098
## [,1] [,2]
## [1,] 4.17685 91.82315
## [2,] 366.82315 8064.17685
## [1] 6.043095e-65
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 10.12979 24.77462
## sample estimates:
## odds ratio
## 15.91616
## NULL
check_supergene_presence_bias(pw.qlf.Ovary_BB_v_Bb.TT$table)
## [1] 8 363 16 8140
## [1] -2555
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 8 16
## [2,] 363 8140
## [,1] [,2]
## [1,] 1.044213 22.95579
## [2,] 369.955787 8133.04421
## [1] 3.172793e-12
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 4.744e-06
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 4.122905 27.963466
## sample estimates:
## odds ratio
## 11.20021
## NULL
check_supergene_presence_bias(pw.qlf.Ovary_BB_v_bb.TT$table)
## [1] 40 331 117 8039
## [1] -2555
## [,1] [,2]
## [1,] 40 117
## [2,] 331 8039
## [,1] [,2]
## [1,] 6.83089 150.1691
## [2,] 364.16911 8005.8309
## [1] 3.400547e-39
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 5.549275 12.199496
## sample estimates:
## odds ratio
## 8.298817
## NULL
## Generate adjusted logFC versions of the results tables so pw and glm tables are comparable.
glm.QLF.BBvBb_adj <- glm.QLF.BBvBb_TT_sig
glm.QLF.BBvBb_adj$logFC <- -1*glm.QLF.BBvBb_TT_sig$logFC
glm.QLF.BBvbb_adj <- glm.QLF.BBvbb_TT_sig
glm.QLF.BBvbb_adj$logFC <- -1*glm.QLF.BBvbb_TT_sig$logFC
check_supergene_dir_bias(glm.QLF.BBvBb_adj)
## [1] 1 16 1 16
## [1] -16
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 1 1
## [2,] 16 16
## [,1] [,2]
## [1,] 1 1
## [2,] 16 16
## [1] 1
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 1
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.01200665 83.28721289
## sample estimates:
## odds ratio
## 1
## NULL
check_supergene_dir_bias(glm.QLF.BBvbb_adj)
## [1] 16 27 13 25
## [1] -52
## [,1] [,2]
## [1,] 16 13
## [2,] 27 25
## [,1] [,2]
## [1,] 15.39506 13.60494
## [2,] 27.60494 24.39506
## [1] 0.7787573
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.8197
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.4165648 3.1443264
## sample estimates:
## odds ratio
## 1.137764
## NULL
check_supergene_dir_bias(pw.qlf.Brain_BB_v_Bb.TT.sig)
## [1] 1 13 4 10
## [1] -14
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 1 4
## [2,] 13 10
## [,1] [,2]
## [1,] 2.5 2.5
## [2,] 11.5 11.5
## [1] 0.1387917
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.3259
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.003637868 2.484508917
## sample estimates:
## odds ratio
## 0.2033741
## NULL
check_supergene_dir_bias(pw.qlf.Brain_BB_v_bb.TT.sig)
## [1] 17 21 38 20
## [1] -48
## [,1] [,2]
## [1,] 17 38
## [2,] 21 20
## [,1] [,2]
## [1,] 21.77083 33.22917
## [2,] 16.22917 24.77083
## [1] 0.04412524
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.05811
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.1693863 1.0681989
## sample estimates:
## odds ratio
## 0.4300187
## NULL
check_supergene_dir_bias(pw.qlf.Ovary_BB_v_Bb.TT.sig)
## [1] 1 7 0 16
## [1] -19
## Warning in chisq.test(matrix(out[1:4], nrow = 2), correct = FALSE): Chi-
## squared approximation may be incorrect
## [,1] [,2]
## [1,] 1 0
## [2,] 7 16
## [,1] [,2]
## [1,] 0.3333333 0.6666667
## [2,] 7.6666667 15.3333333
## [1] 0.1485618
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 0.3333
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.05128188 Inf
## sample estimates:
## odds ratio
## Inf
## NULL
check_supergene_dir_bias(pw.qlf.Ovary_BB_v_bb.TT.sig)
## [1] 19 21 17 100
## [1] -83
## [,1] [,2]
## [1,] 19 17
## [2,] 21 100
## [,1] [,2]
## [1,] 9.171975 26.82803
## [2,] 30.828025 90.17197
## [1] 1.852027e-05
##
## Fisher's Exact Test for Count Data
##
## data: matrix(out[1:4], nrow = 2)
## p-value = 5.337e-05
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 2.192937 12.846506
## sample estimates:
## odds ratio
## 5.250997
## NULL
## Take the results from above and visualize it as a stacked bar plot (Figure S2)
DE_dir_CT <- read.delim("~/Desktop/Sinv_Analyses/DE_dir_CT2.tsv")
DE_dir_CT$Comparison_f = factor(DE_dir_CT$Comparison, levels=c('Full BB vs. Bb','Brain BB vs. Bb','Ovary BB vs. Bb','Full BB vs. bb','Brain BB vs. bb','Ovary BB vs. bb'))
ggplot(data=DE_dir_CT, aes(x=Genome.Position, y=DEG_Ratio, fill=DE.Direction)) +
geom_bar(stat="identity") +
facet_grid(~Comparison_f) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Load SNP-level files in the individual GATK ASE Read Counter files (for all 16 Bb libraries)
setwd("~/Desktop/Sinv_Analyses/GATK_ASERC_mod/")
temp = list.files(pattern="*counts.csv")
temp2 <- sapply(strsplit(temp, split='_', fixed=TRUE), function(x) (x[1]))
myfiles = lapply(temp, read.delim)
# Note: Each File is saved as myfiles[[x]] as a data.frame
## merge first two columns into identifier column
for (i in 1:16){
myfiles[[i]]$ID <- paste(myfiles[[i]]$contig,myfiles[[i]]$position,myfiles[[i]]$altAllele,sep = "~")
colnames(myfiles[[i]]) <- paste(colnames(myfiles[[i]]), temp2[i], sep = ".")
colnames(myfiles[[i]])[14] <- "ID"
}
## merge all data.frames by ID
merged <- Reduce(function(...) merge(..., by='ID', all.x=FALSE), myfiles)
## Load gene-level files (made using bedtools intersect) in the individual GATK ASE Read Counter files (for all 16 Bb libraries)
temp_gene = list.files(pattern="*genes.tsv")
temp2_gene <- sapply(strsplit(temp_gene, split='_', fixed=TRUE), function(x) (x[1]))
myfiles_gene = lapply(temp_gene, read.delim)
# Note: Each File is saved as myfiles[[x]] as a data.frame
## merge first two columns into identifier column
for (i in 1:16){
colnames(myfiles_gene[[i]]) <- paste(colnames(myfiles_gene[[i]]), temp2_gene[i], sep = ".")
colnames(myfiles_gene[[i]])[1] <- "gene"
}
## merge all data.frames by ID
merged_genes <- Reduce(function(...) merge(..., by='gene', all.x=FALSE), myfiles_gene)
pull_cols_genes <- grepl("refCount|altCount",colnames(merged_genes))
row.names(merged_genes) <- merged_genes$gene
x_genes <- merged_genes[,pull_cols_genes]
## Extract the specific columns that I want (refCount and altCount)
pull_cols <- grepl("refCount|altCount",colnames(merged))
row.names(merged) <- merged$ID
x <- merged[,pull_cols]
## Load in model matrix (Modified to analyze ASE)
adv_group_ASE <- read.delim("/Users/samarsenault/Desktop/Sinv_Analyses/Sinv_model_matrix6.tsv",sep = "\t")
head(adv_group_ASE)
## Sample Individual_ID Colony_ID Tissue Social.Form Genotype
## 1 104AB 104A 104 Brain Polygyne Bb
## 2 104AB 104A 104 Brain Polygyne Bb
## 3 104AO 104A 104 Ovary Polygyne Bb
## 4 104AO 104A 104 Ovary Polygyne Bb
## 5 107AB 107A 107 Brain Polygyne Bb
## 6 107AB 107A 107 Brain Polygyne Bb
## Alt_ID Allele
## 1 Brain.Polygyne.Bb Reference
## 2 Brain.Polygyne.Bb Alternative
## 3 Ovary.Polygyne.Bb Reference
## 4 Ovary.Polygyne.Bb Alternative
## 5 Brain.Polygyne.Bb Reference
## 6 Brain.Polygyne.Bb Alternative
Colony_ID <- factor(adv_group_ASE$Colony_ID)
Tissue <- factor(adv_group_ASE$Tissue)
Allele <- factor(adv_group_ASE$Allele)
## Describe the Experiment Design and define Contrasts
ASE_design <- model.matrix(~0+Colony_ID+Tissue*Allele,data=adv_group_ASE)
y_ASE <- DGEList(x,samples=ASE_design)
y_ASE <- estimateDisp(y_ASE,ASE_design)
y_ASE <- estimateGLMCommonDisp(y_ASE, ASE_design)
y_ASE <- estimateGLMTrendedDisp(y_ASE, ASE_design)
y_ASE <- estimateGLMTagwiseDisp(y_ASE, ASE_design)
## QLM Fit and DE analysis
fit_ASE <- glmQLFit(y_ASE,ASE_design)
## DE Analysis
QLF.ASE <- glmQLFTest(fit_ASE,coef="AlleleReference")
QLF.ASE_TT <- topTags(QLF.ASE,n=Inf)
QLF.ASE_TT_sig <- topTags(QLF.ASE_TT,p.value = 0.01,n=Inf)$table
QLF.ASE_TT_sig$SNPs <- row.names(QLF.ASE_TT_sig)
make_volcano(QLF.ASE,"SB vs. Sb in SB/Sb individuals")
## [1] 78
## [1] 13
## [1] 6
## [1] 61
## Warning: Removed 1 rows containing missing values (geom_point).
## Note: Positive values in volcano plot indicate higher expression in reference.
## Describe the Experiment Design and define Contrasts
y_gene<- DGEList(x_genes,samples=ASE_design)
y_gene <- estimateDisp(y_gene,ASE_design)
y_gene <- estimateGLMCommonDisp(y_gene, ASE_design)
y_gene <- estimateGLMTrendedDisp(y_gene, ASE_design)
y_gene <- estimateGLMTagwiseDisp(y_gene, ASE_design)
## QLM Fit and DE analysis
fit_gene <- glmQLFit(y_gene,ASE_design)
## DE Analysis
QLF.ASE.gene <- glmQLFTest(fit_gene,coef="AlleleReference")
QLF.ASE.gene_TT <- topTags(QLF.ASE.gene,n=Inf)
QLF.ASE.gene_TT_sig <- topTags(QLF.ASE.gene_TT,p.value = 0.01,n=Inf)$table
QLF.ASE.gene_TT_all <- topTags(QLF.ASE.gene_TT,p.value = 1,n=Inf)$table
## Generate ASE Gene Volcano plot (Figure 4A)
make_volcano(QLF.ASE.gene,"Alternatively Spliced Genes")
## [1] 10
## [1] 5
## [1] 4
## [1] 9
## Note: Positive values in volcano plot indicate higher expression in reference.
## Break the topTags file into a better data.frame
QLF.ASE_TT_df <- QLF.ASE_TT$table
QLF.ASE_TT_df$chr <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[1]))
QLF.ASE_TT_df$start <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[2]))
QLF.ASE_TT_df$end <- sapply(strsplit(row.names(QLF.ASE_TT_df), split='~', fixed=TRUE), function(x) (x[2]))
## merge with linkage map.
genetic_map <- read.delim("~/Desktop/Sinv_Analyses/NCBI_genome/Genetic_Mapping/genetic_map.txt", comment.char="#")
head (genetic_map)
## scaffold start end orientation lg lg_position partial
## 1 NW_011798938.1 1 955721 uncertain lg1 1 FALSE
## 2 NW_011794913.1 1 1181834 + lg1 955822 FALSE
## 3 NW_011795398.1 1 1293734 - lg1 2137756 FALSE
## 4 NW_011801858.1 1 1483336 - lg1 3431590 FALSE
## 5 NW_011798146.1 1 451599 - lg1 4915026 FALSE
## 6 NW_011800891.1 1 3265217 + lg1 5366725 FALSE
## Map the SNPs (based on gnG genome) to the linkage map.
QLF.ASE_TT_df_merge <- merge(QLF.ASE_TT_df,genetic_map,by.x="chr",by.y="scaffold")
for (i in 1:nrow(QLF.ASE_TT_df_merge)){
if (QLF.ASE_TT_df_merge$orientation[i]=="-"){
QLF.ASE_TT_df_merge$newstart[i] <- QLF.ASE_TT_df_merge$lg_position[i] + (QLF.ASE_TT_df_merge$end.y[i]-as.numeric(QLF.ASE_TT_df_merge$start.x[i]))
} else {
QLF.ASE_TT_df_merge$newstart[i] <- QLF.ASE_TT_df_merge$lg_position[i] + as.numeric(QLF.ASE_TT_df_merge$start.x[i]) - as.numeric(QLF.ASE_TT_df_merge$start.y[i])
}
}
QLF.ASE_TT_df_merge$newend <- QLF.ASE_TT_df_merge$newstart
QLF.ASE_TT_df_merge <- QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$FDR<=0.01),]
## Add in LOC IDs for the ASE gene frames
QLF.ASE.gene_TT_sig_LOC <- convert_LOC(QLF.ASE.gene_TT_sig)
QLF.ASE.gene_TT_all_LOC <- convert_LOC(QLF.ASE.gene_TT_all)
## Make ASE DEG Upset Plot (Figure 4B)
##Filter Gene name lists down by which genes were analyzed in ASE and DE
ASE_background <- QLF.ASE.gene_TT_all_LOC$Row.names
Upset_backgroud <- intersect(Background.genes,ASE_background)
ASE_genes_b <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$FDR <= 0.01)&(QLF.ASE.gene_TT_all_LOC$logFC<0),c("Row.names")]
ASE_genes_B <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$FDR <= 0.01)&(QLF.ASE.gene_TT_all_LOC$logFC>0),c("Row.names")]
DE_genes_b <- row.names(glm.QLF.BBvBb_TT_sig[(glm.QLF.BBvBb_TT_sig$logFC>0),])
DE_genes_B <- row.names(glm.QLF.BBvBb_TT_sig[(glm.QLF.BBvBb_TT_sig$logFC<0),])
ASE_genes_b <- intersect(ASE_genes_b,Upset_backgroud)
ASE_genes_B <- intersect(ASE_genes_B,Upset_backgroud)
DE_genes_b <- intersect(DE_genes_b,Upset_backgroud)
DE_genes_B <- intersect(DE_genes_B,Upset_backgroud)
Overlap <- list(ASE_b=ASE_genes_b,
ASE_B=ASE_genes_B,
DE_SBSb=DE_genes_b,
DE_SBSB=DE_genes_B
)
upset(fromList(Overlap),nsets=4,order.by = "freq",text.scale = 1.4)
## Extract overlap information
b_overlap <- ASE_genes_b[(ASE_genes_b %in% DE_genes_b)]
b_overlap_data <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% b_overlap),]
B_overlap <- ASE_genes_B[ASE_genes_B %in% DE_genes_B]
QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% B_overlap),]
## Row.names logFC logCPM F PValue FDR
## 61 gene11836 1.150108 11.61617 46.38109 1.162715e-07 3.226534e-06
## Name
## 61 LOC105203076
## description
## 61 nucleolar protein 16 [Source:RefSeq mRNA;Acc:XM_011171831.1]
nonB_overlap <- ASE_genes_B[!(ASE_genes_B %in% DE_genes_B)]
dosage_comp_B <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% nonB_overlap),]
nonb_overlap <- ASE_genes_b[!(ASE_genes_b %in% DE_genes_b)]
dosage_comp_b <- QLF.ASE.gene_TT_all_LOC[(QLF.ASE.gene_TT_all_LOC$Row.names %in% nonb_overlap),]
## Set some plotting parameters for the karyoploteR
plot.params <- getDefaultPlotParams(plot.type = 1)
plot.params$data1height=300
plot.params$data1outmargin = 20
plot.params$data1inmargin = 5
plot.params$ideogramheight = 10
col.over <- "#FFBD07AA" #Yellow
col.under <- "#00A6EDAA" #Blue
col.insig <- "#7F7F7F"
## Begin Creating the tracks for Figure 5
kp <- plotKaryotype(genome = Chr_sizes_GR, cytobands = gene_bed_GR,chromosomes = c("lg16"),plot.params = plot.params)
## Warning in ideogram.plotter(kp, ...): No 'gieStain' column found in
## cytobands. Using 'gpos50' (gray) for all of them
kpAddBaseNumbers(kp,tick.dist = 1000000,add.units = TRUE)
## Add Supergene
kpPlotRegions(kp,data = supergene_GR,col="darkgreen", r0=0,r1=0.01)
## Add in various inverions from Huang et al. (from 0 to 0.1)
gr <- GRanges(seqnames = "lg16", strand = c("+", "+", "+"),
ranges = IRanges(start = c(7948943,8793193,18097796), end = c(8793193,18097796,18098299)))
values(gr) <- DataFrame(labels = c("Inversion A","Inversion B","Inversion C"))
kpPlotRegions(kp,data = gr[1],col="purple", r0=0.02,r1=0.03)
kpPlotRegions(kp,data = gr[2],col="orange", r0=0.02,r1=0.03)
kpPlotRegions(kp,data = gr[3],col="red", r0=0.02,r1=0.03)
## Plot Gene density across Lg16
kpPlotDensity(kp, gene_bed_GR, window.size = 500000, col="#ddaacc",r0 = 0.04,r1 = 0.09)
## Core BBvbb
QLF.BBvbb_adj.merge2 <- merge(glm.QLF.BBvbb_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4")
QLF.BBvbb_adj.merge2$logFC <- -1*QLF.BBvbb_adj.merge2$logFC
QLF.BBvbb_adj.merge <- QLF.BBvbb_adj.merge2[QLF.BBvbb_adj.merge2$X1=="lg16",]
DEG_GR <- makeGRangesFromDataFrame(QLF.BBvbb_adj.merge, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)
fc.ymax=ceiling(max(abs(DEG_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(DEG_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(DEG_GR))
sign.col[(DEG_GR$logFC<0)&(DEG_GR$FDR<=0.01)] <- col.under
sign.col[(DEG_GR$logFC>0)&(DEG_GR$FDR<=0.01)] <- col.over
kpPoints(kp, data=DEG_GR, y=DEG_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.14, r1 = 0.29)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.14, r1 = 0.29)
DEG_density_B <- generate_heatmap_frame(QLF.BBvbb_adj.merge[QLF.BBvbb_adj.merge$logFC>0,]$Row.names)
DEG_density_b <- generate_heatmap_frame(QLF.BBvbb_adj.merge[QLF.BBvbb_adj.merge$logFC<0,]$Row.names)
colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=DEG_density_B,r0=0.3,r1=0.33,colors = colfunc(5))
kpHeatmap(kp,data=DEG_density_b,r0=0.1+0,r1=0.13,colors = colfunc(5))
## Core BBvBb
QLF.BBvBb_adj.merge2 <- merge(glm.QLF.BBvBb_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4")
QLF.BBvBb_adj.merge <- QLF.BBvBb_adj.merge2[QLF.BBvBb_adj.merge2$X1=="lg16",]
DEG_GR <- makeGRangesFromDataFrame(QLF.BBvBb_adj.merge, seqnames.field = "X1",start.field = "X2",end.field = "X3",keep.extra.columns = TRUE)
fc.ymax=ceiling(max(abs(DEG_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(DEG_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(DEG_GR))
sign.col[(DEG_GR$logFC<0)&(DEG_GR$FDR<=0.01)] <- col.under
sign.col[(DEG_GR$logFC>0)&(DEG_GR$FDR<=0.01)] <- col.over
kpPoints(kp, data=DEG_GR, y=DEG_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.39, r1 = 0.54)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.38, r1 = 0.54)
DEG_density_B <- generate_heatmap_frame(QLF.BBvBb_adj.merge[QLF.BBvBb_adj.merge$logFC>0,]$Row.names)
DEG_density_b <- generate_heatmap_frame(QLF.BBvBb_adj.merge[QLF.BBvBb_adj.merge$logFC<0,]$Row.names)
colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=DEG_density_b,r0=0.35,r1=0.38,colors = colfunc(5))
kpHeatmap(kp,data=DEG_density_B,r0=0.55,r1=0.58,colors = colfunc(5))
## Add in ASE Heatmap and B vs. b dots 0.6-0.8
ASE_GR <- makeGRangesFromDataFrame(QLF.ASE_TT_df_merge, seqnames.field = "lg",start.field = "newstart",end.field = "newend",keep.extra.columns = TRUE)
fc.ymax=ceiling(max(abs(ASE_GR$logFC)))
fc.ymin=-fc.ymax
cex.val <- -log10(ASE_GR$FDR)/15 + 0.7
sign.col <- rep(col.insig, length(ASE_GR))
col.over2 <- rgb(125, 125, 0, max = 255, alpha = 100) #Yellow
col.under2 <- rgb(0, 0, 255, max = 255, alpha = 100) #Blue
sign.col[(ASE_GR$logFC<0)&(ASE_GR$FDR<=0.01)] <- col.under2
sign.col[(ASE_GR$logFC>0)&(ASE_GR$FDR<=0.01)] <- col.over2
kpPoints(kp, data=ASE_GR, y=ASE_GR$logFC, ymax=fc.ymax, ymin=fc.ymin,cex=cex.val, col=sign.col,r0 = 0.64, r1 = 0.79)
kpAxis(kp, ymax=fc.ymax, ymin=fc.ymin,side = 2,cex=0.5,r0 = 0.63, r1 = 0.79)
SNP_density_B <- generate_heatmap_frame_SNP(QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$logFC>0),])
SNP_density_b <- generate_heatmap_frame_SNP(QLF.ASE_TT_df_merge[(QLF.ASE_TT_df_merge$logFC<0),])
colfunc <- colorRampPalette(c("lightcyan1", "red"))
kpHeatmap(kp,data=SNP_density_B,r0=0.80,r1=0.83,colors = colfunc(5))
kpHeatmap(kp,data=SNP_density_b,r0=0.60,r1=0.63,colors = colfunc(5))
## Perform Run's Test on DEGs, Testing for non-uniformity in significance
## Did some merging of data frames using shell scripting and generated an excel spreadsheet of the stats for genes in the supergene.
Supergene_genes <- read_excel("~/Desktop/Sinv_Analyses/Supergene_genes.xlsx",col_types = "guess")
for (i in c(5,6,13:25)){
Supergene_genes[,i] <- as.numeric(unlist(Supergene_genes[,i]))
}
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
runs.test(Supergene_genes$Core_Bb_FDR[!is.na(Supergene_genes$Core_Bb_FDR)],plot=T,threshold = 0.01)
##
## Runs Test
##
## data: Supergene_genes$Core_Bb_FDR[!is.na(Supergene_genes$Core_Bb_FDR)]
## statistic = -1.4578, runs = 31, n1 = 351, n2 = 17, n = 368,
## p-value = 0.1449
## alternative hypothesis: nonrandomness
runs.test(Supergene_genes$Core_bb_FDR[!is.na(Supergene_genes$Core_bb_FDR)],plot=T,threshold = 0.01)
##
## Runs Test
##
## data: Supergene_genes$Core_bb_FDR[!is.na(Supergene_genes$Core_bb_FDR)]
## statistic = -0.4954, runs = 75, n1 = 325, n2 = 43, n = 368,
## p-value = 0.6203
## alternative hypothesis: nonrandomness
## Testing for non-uniformity in direcitonality among DEGs
Bb_DEGs <- Supergene_genes[Supergene_genes$Core_Bb_FDR<=0.01,c("start","Core_Bb_FDR","Core_Bb_logFC")]
Bb_DEGs$binary <- (Bb_DEGs$Core_Bb_logFC > 0)*1
bb_DEGs <- Supergene_genes[Supergene_genes$Core_bb_FDR<=0.01,c("start","Core_bb_FDR","Core_bb_logFC")]
bb_DEGs$binary <- (bb_DEGs$Core_bb_logFC > 0)*1
## Testing for nonuniformity of ASE GENES
ASE_temp <- Supergene_genes[,c("start","ASE_FDR.","ASE_logFC")]
ASE_temp <- ASE_temp[!is.na(ASE_temp[,2]),]
ASE_genes <- ASE_temp[(ASE_temp$ASE_FDR.<=0.01),]
ASE_genes$binary <- (ASE_genes$ASE_logFC > 0)*1
runs.test(ASE_genes$ASE_logFC[9:21],plot=T,threshold = 0) ## This subset is significant
##
## Runs Test
##
## data: ASE_genes$ASE_logFC[9:21]
## statistic = -2.0185, runs = 4, n1 = 6, n2 = 7, n = 13, p-value =
## 0.04354
## alternative hypothesis: nonrandomness
runs.test(ASE_genes$ASE_logFC,plot=T,threshold = 0)
##
## Runs Test
##
## data: ASE_genes$ASE_logFC
## statistic = 0.027658, runs = 15, n1 = 13, n2 = 15, n = 28, p-value
## = 0.9779
## alternative hypothesis: nonrandomness
runs.test(Bb_DEGs$Core_Bb_logFC,plot=T,threshold = 0)
##
## Runs Test
##
## data: Bb_DEGs$Core_Bb_logFC
## statistic = 0.36515, runs = 3, n1 = 16, n2 = 1, n = 17, p-value =
## 0.715
## alternative hypothesis: nonrandomness
runs.test(bb_DEGs$Core_bb_logFC,plot=T,threshold = 0)
##
## Runs Test
##
## data: bb_DEGs$Core_bb_logFC
## statistic = -2.3469, runs = 14, n1 = 27, n2 = 16, n = 43, p-value
## = 0.01893
## alternative hypothesis: nonrandomness
## Runs ASE by SNP
ASE_logFC <- QLF.ASE_TT_df_merge[order(QLF.ASE_TT_df_merge$newend),c("logFC")]
runs.test(ASE_logFC,plot=T,threshold = 0)
##
## Runs Test
##
## data: ASE_logFC
## statistic = -5.7485, runs = 43, n1 = 67, n2 = 91, n = 158, p-value
## = 9.004e-09
## alternative hypothesis: nonrandomness
## Merge with linkage mapped annotation to determine supergene vs. nonsupergene genes
glm.QLF.all_TT_sig_merge <- merge(glm.QLF.all_TT_sig,Sinv_annot_lg,by.x=0,by.y="X4",all.x=TRUE)
supergene_genes <- glm.QLF.all_TT_sig_merge[((glm.QLF.all_TT_sig_merge$X1=="lg16")&(glm.QLF.all_TT_sig_merge$X2>=7311525)&(glm.QLF.all_TT_sig_merge$X3<=18123987)),c("Row.names")]
supergene_genes <- supergene_genes[!is.na(supergene_genes)]
nonsuper_genes <- glm.QLF.all_TT_sig_merge[((grepl(pattern = "lg",x = glm.QLF.all_TT_sig_merge$X1))&(glm.QLF.all_TT_sig_merge$Row.names %!in% supergene_genes)),c("Row.names")]
## Generate Polar Plot with only the genes in the supergene (Figure S3,B,C)
NCBI_supergene_polar <- make_polar_DEG(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),],15)
NCBI_supergene_polar
## Warning: Removed 1 rows containing missing values (geom_point).
NCBI_nonsupergene_polar <- make_polar_DEG(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),],10)
NCBI_nonsupergene_polar
## Extract the angles of expression using an arctangent and then compare the radial distributions using a watson's two test
nonsuper_angle <- atan2(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),c("logFC.GenotypeBb")],glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% nonsuper_genes),c("logFC.Genotypebb")])
supergene_angle <- atan2(glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),c("logFC.GenotypeBb")],glm.QLF.all_TT_sig[(row.names(glm.QLF.all_TT_sig) %in% supergene_genes),c("logFC.Genotypebb")])
watson.two(nonsuper_angle, supergene_angle, alpha=0, plot=TRUE)
##
## Watson's Two-Sample Test of Homogeneity
##
## Test Statistic: 0.2387
## 0.01 < P-value < 0.05
##
## Import the OBP-aligned RNA-seq Data
x1_OBP <- read_delim("~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all",
"\t", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## .default = col_double(),
## GeneID = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 52 parsing failures.
## row col expected actual file
## 1 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 2 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 3 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 4 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## 5 -- 64 columns 65 columns '~/Desktop/Sinv_Analyses/NCBI_genome/OBP_counts/OBP.all'
## ... ... .......... .......... ........................................................
## See problems(...) for more details.
x_OBP <- data.frame(x1_OBP[1:52,2:64],row.names = x1_OBP$GeneID[1:52])
## Generate data.frame for pairwise OBP analyses
y_OBP <- DGEList(x_OBP,group = adv_group$Alt_ID)
keep_OBP <- rowSums(cpm(y_OBP)>10/9) >= 8 ## NEED TO EDIT!!!
y_OBP <- y_OBP[keep_OBP, , keep.lib.sizes=FALSE]
## Normalize samples by library size
y_OBP <- calcNormFactors(y_OBP)
obp.Background.genes <- row.names(y_OBP$counts)
## Begin Computing Differential Expression
y_OBP <- estimateDisp(y_OBP,design_pw)
y_OBP <- estimateGLMCommonDisp(y_OBP, design_pw)
y_OBP <- estimateGLMTrendedDisp(y_OBP, design_pw)
y_OBP <- estimateGLMTagwiseDisp(y_OBP, design_pw)
## QLM Fit and DE analysis
## Note: For more conservative estimates, use glmQLFTest vs. glmLRT
fit_OBP <- glmQLFit(y_OBP,design_pw)
obp.qlf.Brain_BB_v_Bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Brain_BB_v_Bb"])
obp.qlf.Brain_BB_v_Bb.TT <- topTags(obp.qlf.Brain_BB_v_Bb,p.value = 1,n=100000)
obp.qlf.Brain_BB_v_Bb.TT.sig <- topTags(obp.qlf.Brain_BB_v_Bb,p.value = 0.01,n=100000)$table
obp.qlf.Brain_BB_v_Bb.TT.names <- row.names(obp.qlf.Brain_BB_v_Bb.TT$table)
obp.qlf.Ovary_BB_v_Bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Ovary_BB_v_Bb"])
obp.qlf.Ovary_BB_v_Bb.TT <- topTags(obp.qlf.Ovary_BB_v_Bb,p.value = 1,n=100000)
obp.qlf.Ovary_BB_v_Bb.TT.sig <- topTags(obp.qlf.Ovary_BB_v_Bb,p.value = 0.01,n=100000)$table
obp.qlf.Ovary_BB_v_Bb.TT.names <- row.names(obp.qlf.Ovary_BB_v_Bb.TT$table)
obp.qlf.Brain_BB_v_bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Brain_BB_v_bb"])
obp.qlf.Brain_BB_v_bb.TT <- topTags(obp.qlf.Brain_BB_v_bb,p.value = 1,n=100000)
obp.qlf.Brain_BB_v_bb.TT.sig <- topTags(obp.qlf.Brain_BB_v_bb,p.value = 0.01,n=100000)$table
obp.qlf.Brain_BB_v_bb.TT.names <- row.names(obp.qlf.Brain_BB_v_bb.TT$table)
obp.qlf.Ovary_BB_v_bb <- glmQLFTest(fit_OBP, contrast=my.contrasts[,"Ovary_BB_v_bb"])
obp.qlf.Ovary_BB_v_bb.TT <- topTags(obp.qlf.Ovary_BB_v_bb,p.value = 1,n=100000)
obp.qlf.Ovary_BB_v_bb.TT.sig <- topTags(obp.qlf.Ovary_BB_v_bb,p.value = 0.01,n=100000)$table
obp.qlf.Ovary_BB_v_bb.TT.names <- row.names(obp.qlf.Ovary_BB_v_bb.TT$table)
## Now the GLM version of the analysis
y_obp_glm <- DGEList(x_OBP,samples=glm_design)
## Filter out low count genes
y_obp_glm <- y_obp_glm[keep_OBP, , keep.lib.sizes=FALSE]
### Normalize samples by library size
y_obp_glm <- calcNormFactors(y_obp_glm)
## Begin Computing Differential Expression
y_obp_glm <- estimateDisp(y_obp_glm,glm_design)
y_obp_glm <- estimateGLMCommonDisp(y_obp_glm, glm_design)
y_obp_glm <- estimateGLMTrendedDisp(y_obp_glm, glm_design)
y_obp_glm <- estimateGLMTagwiseDisp(y_obp_glm, glm_design)
## QLM Fit and DE analysis
## Note: For more conservative estimates, use glmQLFTest vs. glmLRT
fit_obp_glm <- glmQLFit(y_obp_glm,glm_design)
## Compute Differential expression based on number of supergene alleles
## DE Analysis for Paper
obp.QLF.BBvBb <- glmQLFTest(fit_obp_glm,coef="GenotypeBb")
obp.QLF.BBvBb_TT <- topTags(obp.QLF.BBvBb,n=Inf)
obp.QLF.BBvBb_TT_sig <- topTags(obp.QLF.BBvBb_TT,p.value = 0.01,n=Inf)$table
obp.QLF.BBvbb <- glmQLFTest(fit_obp_glm,coef="Genotypebb")
obp.QLF.BBvbb_TT <- topTags(obp.QLF.BBvbb,n=Inf)
obp.QLF.BBvbb_TT_sig <- topTags(obp.QLF.BBvbb_TT,p.value = 0.01,n=Inf)$table
## Compute differential expression based on social form
obp.QLF.SF <- glmQLFTest(fit_obp_glm,contrast=makeContrasts(Monogyne - Polygyne, levels=glm_design))
obp.QLF.SF_TT <- topTags(obp.QLF.SF,n=Inf)
obp.QLF.SF_TT_sig <- topTags(obp.QLF.SF_TT,p.value = 0.01,n=Inf)$table
## Reorder the dataframes
obp.qlf.Brain_BB_v_Bb_order <- obp.qlf.Brain_BB_v_Bb.TT$table[ order(row.names(obp.qlf.Brain_BB_v_Bb.TT$table)), ]
obp.qlf.Ovary_BB_v_Bb_order <- obp.qlf.Ovary_BB_v_Bb.TT$table[ order(row.names(obp.qlf.Ovary_BB_v_Bb.TT$table)), ]
obp.qlf.Brain_BB_v_bb_order <- obp.qlf.Brain_BB_v_bb.TT$table[ order(row.names(obp.qlf.Brain_BB_v_bb.TT$table)), ]
obp.qlf.Ovary_BB_v_bb_order <- obp.qlf.Ovary_BB_v_bb.TT$table[ order(row.names(obp.qlf.Ovary_BB_v_bb.TT$table)), ]
obp.qlf.Core_BB_v_Bb_order <- obp.QLF.BBvBb_TT$table[ order(row.names(obp.QLF.BBvBb_TT$table)), ]
obp.qlf.Core_BB_v_bb_order <- obp.QLF.BBvbb_TT$table[ order(row.names(obp.QLF.BBvbb_TT$table)), ]
obp.qlf.SF_order <- obp.QLF.SF_TT$table[ order(row.names(obp.QLF.SF_TT$table)), ]
#### Generate FDR heatmap (Figure S4B)
DEG_heatmap <- data.frame(rep(x = 0, 27))
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Core_BB_v_Bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Core_BB_v_bb_order$FDR)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.SF_order$FDR)
row.names(DEG_heatmap) <- row.names(obp.qlf.Core_BB_v_bb_order)
DEG_heatmap <- DEG_heatmap[,2:8]
colnames(DEG_heatmap) <- c("Brain_BB_v_Bb","Ovary_BB_v_Bb","Brain_BB_v_bb","Ovary_BB_v_bb","Core_BB_v_Bb","Core_BB_v_bb","Core_SocialForm")
# create color palette
col.pal <- brewer.pal(9,"YlOrRd")
hm.parameters <- list(DEG_heatmap,
color = col.pal,
scale = "none",
kmeans_k = NA,
show_rownames = T, show_colnames = T,breaks=c(0,0.001,0.01,0.05,0.1,0.2,0.5,1),display_numbers=TRUE,number_format="%.3f",
main = "Solenopsis invicta OBP DEG Significance",
clustering_method = "complete",
cluster_rows = FALSE, cluster_cols = FALSE,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation")
do.call("pheatmap", hm.parameters)
#### Generate LogFC heatmap (Figure S4A)
DEG_heatmap <- data.frame(rep(x = 0, 27))
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Brain_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.Ovary_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,-1*obp.qlf.Core_BB_v_Bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,-1*obp.qlf.Core_BB_v_bb_order$logFC)
DEG_heatmap <- cbind(DEG_heatmap,obp.qlf.SF_order$logFC)
row.names(DEG_heatmap) <- row.names(obp.qlf.Core_BB_v_bb_order)
DEG_heatmap <- DEG_heatmap[,2:8]
colnames(DEG_heatmap) <- c("Brain_BB_v_Bb","Ovary_BB_v_Bb","Brain_BB_v_bb","Ovary_BB_v_bb","Core_BB_v_Bb","Core_BB_v_bb","Core_SocialForm")
# create color palette
col.pal <- brewer.pal(9,"RdBu")
hm.parameters <- list(DEG_heatmap,
color = col.pal,
scale = "none",
kmeans_k = NA,
show_rownames = T, show_colnames = T,display_numbers=TRUE,number_format="%.3f",
main = "Solenopsis invicta OBP DEG logFC",
clustering_method = "complete",
cluster_rows = FALSE, cluster_cols = FALSE,
clustering_distance_rows = "correlation",
clustering_distance_cols = "correlation")
do.call("pheatmap", hm.parameters)
save.image(file = "Molecular_Ecology_Markdown.RData")
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] rentrez_1.2.2 rtracklayer_1.42.2 readxl_1.3.1
## [4] randtests_1.0 RColorBrewer_1.1-2 CircStats_0.2-6
## [7] boot_1.3-23 MASS_7.3-51.4 pheatmap_1.0.12
## [10] eulerr_6.0.0 data.table_1.12.2 topGO_2.34.0
## [13] SparseM_1.77 GO.db_3.7.0 AnnotationDbi_1.44.0
## [16] Biobase_2.42.0 graph_1.60.0 ggplot2_3.2.1
## [19] cowplot_1.0.0 karyoploteR_1.8.8 regioneR_1.14.0
## [22] GenomicRanges_1.34.0 GenomeInfoDb_1.18.2 IRanges_2.16.0
## [25] S4Vectors_0.20.1 BiocGenerics_0.28.0 gridExtra_2.3
## [28] magick_2.2 readr_1.3.1 UpSetR_1.4.0
## [31] edgeR_3.24.3 limma_3.38.3
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 biovizBase_1.30.1
## [3] htmlTable_1.13.2 XVector_0.22.0
## [5] base64enc_0.1-3 dichromat_2.0-0
## [7] rstudioapi_0.10 bit64_0.9-7
## [9] fansi_0.4.0 splines_3.5.1
## [11] knitr_1.25 polyclip_1.10-0
## [13] zeallot_0.1.0 jsonlite_1.6
## [15] Formula_1.2-3 Rsamtools_1.34.1
## [17] cluster_2.1.0 compiler_3.5.1
## [19] httr_1.4.1 backports_1.1.4
## [21] assertthat_0.2.1 Matrix_1.2-17
## [23] lazyeval_0.2.2 cli_1.1.0
## [25] formatR_1.7 acepack_1.4.1
## [27] htmltools_0.3.6 prettyunits_1.0.2
## [29] tools_3.5.1 gtable_0.3.0
## [31] glue_1.3.1 GenomeInfoDbData_1.2.0
## [33] reshape2_1.4.3 dplyr_0.8.3
## [35] Rcpp_1.0.2 cellranger_1.1.0
## [37] vctrs_0.2.0 Biostrings_2.50.2
## [39] polylabelr_0.1.0 xfun_0.10
## [41] stringr_1.4.0 ensembldb_2.6.8
## [43] XML_3.98-1.20 zlibbioc_1.28.0
## [45] scales_1.0.0 BSgenome_1.50.0
## [47] VariantAnnotation_1.28.13 hms_0.5.1
## [49] ProtGenerics_1.14.0 SummarizedExperiment_1.12.0
## [51] AnnotationFilter_1.6.0 yaml_2.2.0
## [53] curl_4.2 memoise_1.1.0
## [55] biomaRt_2.38.0 rpart_4.1-15
## [57] latticeExtra_0.6-28 stringi_1.4.3
## [59] RSQLite_2.1.2 checkmate_1.9.4
## [61] GenomicFeatures_1.34.8 BiocParallel_1.16.6
## [63] rlang_0.4.0 pkgconfig_2.0.3
## [65] matrixStats_0.55.0 bitops_1.0-6
## [67] evaluate_0.14 lattice_0.20-38
## [69] purrr_0.3.2 labeling_0.3
## [71] GenomicAlignments_1.18.1 htmlwidgets_1.3
## [73] bit_1.1-14 tidyselect_0.2.5
## [75] plyr_1.8.4 magrittr_1.5
## [77] R6_2.4.0 Hmisc_4.2-0
## [79] DelayedArray_0.8.0 DBI_1.0.0
## [81] pillar_1.4.2 foreign_0.8-72
## [83] withr_2.1.2 survival_2.44-1.1
## [85] RCurl_1.95-4.12 nnet_7.3-12
## [87] tibble_2.1.3 crayon_1.3.4
## [89] utf8_1.1.4 rmarkdown_1.16
## [91] bamsignals_1.14.0 progress_1.2.2
## [93] locfit_1.5-9.1 grid_3.5.1
## [95] blob_1.2.0 digest_0.6.21
## [97] bezier_1.1.2 munsell_0.5.0